clear
clear matrix
set matsize 11000, permanently
set maxvar 32767 
set matsize 800
********************************************************************************

* import data of enrolled students (level 1 and 2) from MIUR 
* (see "enrolled_students_from_miur.do" for details on how Immatricolati_Provenienza_Finale.dta has been obtained)
use "$path/Immatricolati_Provenienza_Finale.dta", clear
collapse (sum) TOTALE, by(id_ateneo anno COD_AreaDidattica)
* gen area_mixed
gen area_mixed=.
replace area_mixed=1 if COD_AreaDidattica==1
replace area_mixed=2 if COD_AreaDidattica==2
replace area_mixed=3 if COD_AreaDidattica==3
replace area_mixed=4 if (COD_AreaDidattica==4 | COD_AreaDidattica==15)
replace area_mixed=5 if COD_AreaDidattica==7
replace area_mixed=6 if COD_AreaDidattica==6
replace area_mixed=7 if COD_AreaDidattica==5
replace area_mixed=8 if (COD_AreaDidattica==11 | COD_AreaDidattica==12)
replace area_mixed=9 if (COD_AreaDidattica==13 | COD_AreaDidattica==14)
replace area_mixed=10 if COD_AreaDidattica==10
replace area_mixed=11 if COD_AreaDidattica==8
replace area_mixed=12 if (COD_AreaDidattica==9 | COD_AreaDidattica==16)
drop if COD_AreaDidattica==16 /* drops defence and military sciences */
compress

gen period=.
replace period=1 if anno<1995 & anno>1989
replace period=2 if anno<1999 & anno>1994
replace period=3 if anno<2003 & anno>1998
replace period=4 if anno<2007 & anno>2002
replace period=5 if anno<2012 & anno>2006
tab anno period, nom

* data availability for enrolled students data is between 2002 and 2011 only
collapse (sum) TOTALE, by(anno area_mixed)
rename TOTALE enrolled
save imm, replace

* import data on # of retirees
use "$path/people.dta", clear
xtset id_prof anno
gen exit=(fascia==. & L.fascia!=.)
replace exit=F.exit
replace exit=. if exit==1 & fascia==.
gen tmp=1
gen area_mixed=.
replace area_mixed=1 if (areacun==1 | areacun==2)
replace area_mixed=2 if areacun==3
replace area_mixed=3 if (areacun==4 | areacun==5)
replace area_mixed=4 if areacun==6
replace area_mixed=5 if areacun==7
replace area_mixed=6 if areacun==8
replace area_mixed=7 if areacun==9
replace area_mixed=8 if areacun==10
replace area_mixed=9 if areacun==11 
replace area_mixed=10 if areacun==12
replace area_mixed=11 if areacun==13
replace area_mixed=12 if areacun==14
bys ssd anno fascia: egen N=sum(tmp)
preserve
collapse (sum) tmp (mean) area_mixed if exit==1, by(ssd anno fascia)
drop if ssd==""
rename tmp retiree
reshape wide retiree, i(ssd anno) j(fascia)
forvalues i=1(1)3{
replace retiree`i'=0 if retiree`i'==.
}
rename retiree1 retireeric 
rename retiree2 retireeass
rename retiree3 retireeord
save tmp_retired, replace
restore
preserve
collapse (mean) area_mixed N, by(ssd anno fascia)
drop if ssd==""
reshape wide N, i(ssd anno) j(fascia)
rename N1 Nric
rename N2 Nass
rename N3 Nord
save tmp_N, replace
restore
drop tmp
gen tmp=1
gen tmpass=fascia==2
gen tmpord=fascia==3
gen south=(region==1 | region==2 | region==3 | region==4 | region==11 | region==13 | region==14 | region==15)
gen southass=(south==1 & fascia==2)
gen southord=(south==1 & fascia==3)
collapse (sum) newass ricass neword ricord assord south tmp* southass southord (mean) area_mixed, by(anno ssd)
gen kass=newass+ricass
gen kord=neword+ricord+assord
gen r_southT=south/tmp
bys anno: gen r_south2=southass/tmpass
bys anno: gen r_south3=southord/tmpord
drop newass ricass neword ricord assord
save k, replace

clear

use regFinal, clear
gen area_mixed=.
replace area_mixed=1 if (areacun==1 | areacun==2)
replace area_mixed=2 if areacun==3
replace area_mixed=3 if (areacun==4 | areacun==5)
replace area_mixed=4 if areacun==6
replace area_mixed=5 if areacun==7
replace area_mixed=6 if areacun==8
replace area_mixed=7 if areacun==9
replace area_mixed=8 if areacun==10
replace area_mixed=9 if areacun==11 
replace area_mixed=10 if areacun==12
replace area_mixed=11 if areacun==13
replace area_mixed=12 if areacun==14
collapse (mean) mM* avage av2age av3age shfemale sh2female sh3female (mean) area_mixed, by(period ssd)
decode ssdcodefinal, ge(ssd_stringa)
rename  ssd_stringa ssd
drop ssdcodefinal
save x, replace

use k, clear
merge 1:1 anno ssd using tmp_retired
drop _merge
merge 1:1 anno ssd using tmp_N
drop _merge
merge m:1 anno area using imm
drop _merge
gen period=.
replace period=1 if anno<1995 & anno>1989
replace period=2 if anno<1999 & anno>1994
replace period=3 if anno<2003 & anno>1998
replace period=4 if anno<2007 & anno>2002
replace period=5 if anno<2012 & anno>2006
merge m:1 period ssd using x
drop if anno==.

gen reform=anno>2009 & anno<2012
encode ssd, gen(sd)
xtset sd anno
bys anno area: gen area_enr=sum(enrolled)
bys anno: gen tot_enr=sum(enrolled)
gen ratio_enrolled=area_enr/tot_enr
gen r_k=(kass+kord)/(Nass+Nord)
gen r_kass=kass/Nass
gen r_kord=kord/Nord

su r*

sort sd anno
gen r_retiree=(L.retireeass+L.retireeord)/(L.Nass+L.Nord)
gen r_retireeass=L.retireeass/L.Nass
gen r_retireeord=L.retireeord/L.Nord
gen LmMassE =L.mMassE
gen LmMordE =L.mMordE
gen Lratio_enrolled=L.ratio_enrolled
gen Lr_southT=L.r_southT
gen Lr_south2=L.r_south2
gen Lr_south3=L.r_south3
drop _merge

merge 1:1 ssd anno using top10_new
keep if _merge==1 | _merge==3
sort sd anno
gen LmME =((LmMassE*Nass)+(LmMordE*Nord))/2
gen Ltop10_new=L.top10
gen Lavage=L.avage 
gen Lav2age=L.av2age
gen Lav3age=L.av3age
gen Lshfemale=L.shfemale
gen Lsh2female=L.sh2female
gen Lsh3female=L.sh3female


* Tables A6-A7-A8 - By year 

su r_kass r_kord r_k r_retireeass r_retireeord r_retiree LmMassE LmMordE LmME


* Associate Professors (tab A6)
xtreg r_kass r_retireeass LmMassE i.anno##i.area_mixed, fe
outreg2 using tabA6.xls, keep(r_retireeass LmMassE) noparen aster(coef) se bdec(3) nocons cti(1) label replace
xtreg r_kass r_retireeass LmMassE Lavage Lav2age Lshfemale Lsh2female Lr_southT Lr_south2 i.anno##i.area_mixed, fe  
outreg2 using tabA6.xls, keep(r_retireeass LmMassE Lavage Lav2age Lshfemale Lsh2female Lr_southT Lr_south2) noparen aster(coef) se bdec(3) nocons cti(1) label append
xtreg r_kass r_retireeass LmMassE Lavage Lav2age Lshfemale Lsh2female Lr_southT Lr_south2 Lratio_enrolled i.anno##i.area_mixed, fe  
outreg2 using tabA6.xls, keep(r_retireeass LmMassE Lavage Lav2age Lshfemale Lr_southT Lr_south2 Lsh2female Lratio_enrolled ) noparen aster(coef) se bdec(3) nocons cti(1) label append
xtreg r_kass r_retireeass  Lavage Lav2age Lshfemale Lsh2female Lr_southT Lr_south2 Lratio_enrolled Ltop10_new i.anno##i.area_mixed if LmMassE!=., fe  
outreg2 using tabA6.xls, keep(r_retireeass Lavage Lav2age Lshfemale Lsh2female Lr_southT Lr_south2 Lratio_enrolled Ltop10_new) noparen aster(coef) se bdec(3) nocons cti(1) label append
xtreg r_kass r_retireeass LmMassE Lavage Lav2age Lshfemale Lsh2female Lr_southT Lr_south2 Lratio_enrolled Ltop10_new i.anno##i.area_mixed, fe 
outreg2 using tabA6.xls, keep(r_retireeass LmMassE Lavage Lav2age Lshfemale Lr_southT Lr_south2 Lsh2female Lratio_enrolled Ltop10_new) noparen aster(coef) se bdec(3) nocons cti(1) label append


* Full Professors (tab A7)
xtreg r_kord r_retireeord LmMordE i.anno##i.area_mixed, fe
outreg2 using tabA7.xls, keep(r_retireeord LmMordE) noparen aster(coef) se bdec(3) nocons cti(1) label replace
xtreg r_kord r_retireeord LmMordE Lavage Lav3age Lshfemale Lsh3female Lr_southT Lr_south3 i.anno##i.area_mixed, fe  
outreg2 using tabA7.xls, keep(r_retireeord LmMordE Lavage Lav3age Lshfemale Lsh3female Lr_southT Lr_south3) noparen aster(coef) se bdec(3) nocons cti(1) label append
xtreg r_kord r_retireeord LmMordE Lavage Lav3age Lshfemale Lsh3female Lr_southT Lr_south3 Lratio_enrolled i.anno##i.area_mixed, fe  
outreg2 using tabA7.xls, keep(r_retireeord LmMordE Lavage Lav3age Lshfemale Lsh3female Lr_southT Lr_south3 Lratio_enrolled) noparen aster(coef) se bdec(3) nocons cti(1) label append
xtreg r_kord r_retireeord  Lavage Lav3age Lshfemale Lsh3female Lr_southT Lr_south3 Lratio_enrolled Ltop10_new i.anno##i.area_mixed if LmMordE!=., fe  
outreg2 using tabA7.xls, keep(r_retireeord Lavage Lav3age Lshfemale Lsh3female Lr_southT Lr_south3 Lratio_enrolled Ltop10_new) noparen aster(coef) se bdec(3) nocons cti(1) label append
xtreg r_kord r_retireeord LmMordE Lavage Lav3age Lshfemale Lsh3female Lr_southT Lr_south3 Lratio_enrolled Ltop10_new i.anno##i.area_mixed, fe 
outreg2 using tabA7.xls, keep(r_retireeord LmMordE Lavage Lav3age Lshfemale Lsh3female Lr_southT Lr_south3 Lratio_enrolled Ltop10_new) noparen aster(coef) se bdec(3) nocons cti(1) label append

* All Associates and Full professors [k=kass+kord and r=r_ass+r_ord and LmME] (tab A8)

xtreg r_k r_retiree LmME i.anno##i.area_mixed, fe
outreg2 using tabA8.xls, keep(r_retiree LmME) noparen aster(coef) se bdec(3) nocons cti(1) label replace
xtreg r_k r_retiree LmME Lavage Lshfemale Lr_southT i.anno##i.area_mixed, fe  
outreg2 using tabA8.xls, keep(r_retiree LmME Lavage Lshfemale Lr_southT) noparen aster(coef) se bdec(3) nocons cti(1) label append
xtreg r_k r_retiree LmME Lavage Lshfemale Lr_southT Lratio_enrolled i.anno##i.area_mixed, fe  
outreg2 using tabA8.xls, keep(r_retiree LmME Lavage Lshfemale Lr_southT Lratio_enrolled) noparen aster(coef) se bdec(3) nocons cti(1) label append
xtreg r_k r_retiree Lavage Lshfemale Lr_southT Lratio_enrolled Ltop10_new i.anno##i.area_mixed if LmME!=., fe  
outreg2 using tabA8.xls, keep(r_retiree Lavage Lshfemale Lr_southT Lratio_enrolled Ltop10_new) noparen aster(coef) se bdec(3) nocons cti(1) label append
xtreg r_k r_retiree LmME Lavage Lshfemale Lr_southT Lratio_enrolled Ltop10_new i.anno##i.area_mixed, fe 
outreg2 using tabA8.xls, keep(r_retiree LmME Lavage Lshfemale Lr_southT Lratio_enrolled Ltop10_new) noparen aster(coef) se bdec(3) nocons cti(1) label append


* Tables A6-A7-A8 - By period
preserve
collapse (sum) kass kord Nass Nord retireeass retireeord enrolled tmp* south southass southord (mean) top10 area_mixed mMassE mMordE avage av2age av3age shfemale sh2female sh3female, by(ssd period)
encode ssd, gen(sd)
xtset sd period
bys period area_mixed: gen area_enr=sum(enrolled)
bys period: gen tot_enr=sum(enrolled)
gen ratio_enrolled=area_enr/tot_enr
gen r_k=(kass+kord)/(Nass+Nord)
gen r_kass=kass/Nass
gen r_kord=kord/Nord
sort sd period
gen r_retiree=(L.retireeass+L.retireeord)/(L.Nass+L.Nord)
gen r_retireeass=L.retireeass/L.Nass
gen r_retireeord=L.retireeord/L.Nord
gen LmMassE =L.mMassE
gen LmMordE =L.mMordE
gen Lratio_enrolled=L.ratio_enrolled
gen LmME =((LmMassE*L.Nass)+(LmMordE*L.Nord))/2
gen Ltop10_new=L.top10
gen Lavage=L.avage 
gen Lav2age=L.av2age
gen Lav3age=L.av3age
gen Lshfemale=L.shfemale
gen Lsh2female=L.sh2female
gen Lsh3female=L.sh3female
gen Lr_southT=L.south/L.tmp
gen Lr_south2=(L.southass/L.tmpass)
gen Lr_south3=(L.southord/L.tmpord)

su r_kass r_kord r_k r_retireeass r_retireeord r_retiree LmMassE LmMordE LmME

* Associate Professors (tab A6)
xtreg r_kass r_retireeass LmMassE i.period##i.area_mixed , fe
outreg2 using tabA6_period.xls, keep(r_retireeass LmMassE) noparen aster(coef) se bdec(3) nocons cti(1) label replace
xtreg r_kass r_retireeass LmMassE Lavage Lav2age Lshfemale Lsh2female Lr_southT Lr_south2 i.period##i.area_mixed, fe  
outreg2 using tabA6_period.xls, keep(r_retireeass LmMassE Lavage Lav2age Lshfemale Lsh2female Lr_southT Lr_south2) noparen aster(coef) se bdec(3) nocons cti(1) label append
xtreg r_kass r_retireeass LmMassE Lavage Lav2age Lshfemale Lsh2female Lr_southT Lr_south2 Lratio_enrolled i.period##i.area_mixed, fe  
outreg2 using tabA6_period.xls, keep(r_retireeass LmMassE Lavage Lav2age Lshfemale Lsh2female Lr_southT Lr_south2 Lratio_enrolled ) noparen aster(coef) se bdec(3) nocons cti(1) label append
xtreg r_kass r_retireeass Lavage Lav2age Lshfemale Lsh2female Lr_southT Lr_south2 Lratio_enrolled Ltop10_new i.period##i.area_mixed if LmMassE!=., fe  
outreg2 using tabA6_period.xls, keep(r_retireeass Lavage Lav2age Lshfemale Lsh2female Lr_southT Lr_south2 Lratio_enrolled Ltop10_new) noparen aster(coef) se bdec(3) nocons cti(1) label append
xtreg r_kass r_retireeass LmMassE Lavage Lav2age Lshfemale Lsh2female Lr_southT Lr_south2 Lratio_enrolled Ltop10_new i.period##i.area_mixed, fe 
outreg2 using tabA6_period.xls, keep(r_retireeass LmMassE Lavage Lav2age Lshfemale Lsh2female Lr_southT Lr_south2 Lratio_enrolled Ltop10_new) noparen aster(coef) se bdec(3) nocons cti(1) label append

* Full Professors (tab A7)
xtreg r_kord r_retireeord LmMordE i.period##i.area_mixed, fe
outreg2 using tabA7_period.xls, keep(r_retireeord LmMordE) noparen aster(coef) se bdec(3) nocons cti(1) label replace
xtreg r_kord r_retireeord LmMordE Lavage Lav3age Lshfemale Lsh3female Lr_southT Lr_south3 i.period##i.area_mixed, fe  
outreg2 using tabA7_period.xls, keep(r_retireeord LmMordE Lavage Lav3age Lshfemale Lsh3female Lr_southT Lr_south3) noparen aster(coef) se bdec(3) nocons cti(1) label append
xtreg r_kord r_retireeord LmMordE Lavage Lav3age Lshfemale Lsh3female Lr_southT Lr_south3 Lratio_enrolled  i.period##i.area_mixed, fe  
outreg2 using tabA7_period.xls, keep(r_retireeord LmMordE Lavage Lav3age Lshfemale Lsh3female Lr_southT Lr_south3 Lratio_enrolled) noparen aster(coef) se bdec(3) nocons cti(1) label append
xtreg r_kord r_retireeord Lavage Lav3age Lshfemale Lsh3female Lr_southT Lr_south3 Lratio_enrolled Ltop10_new i.period##i.area_mixed if LmMordE!=., fe  
outreg2 using tabA7_period.xls, keep(r_retireeord Lavage Lav3age Lshfemale Lsh3female Lr_southT Lr_south3 Lratio_enrolled Ltop10_new) noparen aster(coef) se bdec(3) nocons cti(1) label append
xtreg r_kord r_retireeord LmMordE Lavage Lav3age Lshfemale Lsh3female Lr_southT Lr_south3 Lratio_enrolled Ltop10_new i.period##i.area_mixed, fe 
outreg2 using tabA7_period.xls, keep(r_retireeord LmMordE Lavage Lav3age Lshfemale Lsh3female Lr_southT Lr_south3 Lratio_enrolled Ltop10_new) noparen aster(coef) se bdec(3) nocons cti(1) label append


* All Associates and Full professors [k=kass+kord and r=r_ass+r_ord and LmME] (tab A8)
 
xtreg r_k r_retiree LmME i.period##i.area_mixed, fe
outreg2 using tabA8_period.xls, keep(r_retiree LmME) noparen aster(coef) se bdec(3) nocons cti(1) label replace
xtreg r_k r_retiree LmME Lavage Lshfemale Lr_southT i.period##i.area_mixed, fe  
outreg2 using tabA8_period.xls, keep(r_retiree LmME Lavage Lshfemale Lr_southT) noparen aster(coef) se bdec(3) nocons cti(1) label append
xtreg r_k r_retiree LmME Lavage Lshfemale Lr_southT Lratio_enrolled i.period##i.area_mixed, fe  
outreg2 using tabA8_period.xls, keep(r_retiree LmME Lavage Lshfemale Lr_southT Lratio_enrolled ) noparen aster(coef) se bdec(3) nocons cti(1) label append
xtreg r_k r_retiree Lavage Lshfemale Lr_southT Lratio_enrolled Ltop10_new i.period##i.area_mixed if LmME!=., fe  
outreg2 using tabA8_period.xls, keep(r_retiree Lavage Lshfemale Lr_southT Lratio_enrolled Ltop10_new) noparen aster(coef) se bdec(3) nocons cti(1) label append
xtreg r_k r_retiree LmME Lavage Lshfemale Lr_southT Lratio_enrolled Ltop10_new i.period##i.area_mixed, fe 
outreg2 using tabA8_period.xls, keep(r_retiree LmME Lavage Lshfemale Lr_southT Lratio_enrolled Ltop10_new) noparen aster(coef) se bdec(3) nocons cti(1) label append
restore 


* Regressions for footnote 25 pag. 37 
use regFinal, clear
gen fascia=1 if fascass==0 & fascord==0
replace fascia=2 if fascass>0
replace fascia=3 if fascord>0
egen NN = count(id_prof) , by(ssd period fascia)
bys period fascia: egen NORD = sum(NN) if fascia==3
bys period fascia: egen NASS = sum(NN) if fascia==2
bys period fascia: egen NRIC = sum(NN) if fascia==1
bys ssd period: egen NNORD = min(NORD)
bys ssd period: egen NNASS = min(NASS)
bys ssd period: egen NNRIC = min(NRIC)
gen boss = NNORD/(NNASS+NNRIC+NNORD)
replace ioutput1e=1 if fascass==1 | (fascass==0 & fascord==0)
replace ioutput1e = 1 if fascia !=3
collapse (sd) ioutput1e (mean) boss mMassE mMordE (sum) Nass Nord, by(ssd fascia period)
bys ssd period: egen sd_ord = max(ioutput1e) 
reg mMass sd_ord boss i.period if fascia == 1
outreg2 using table_restat_R2_FS_period.xls, noparen aster(coef) se bdec(3) nocons cti("Assoc.") label replace
reg mMord sd_ord boss i.period if fascia == 2
outreg2 using table_restat_R2_FS_period.xls, noparen aster(coef) se bdec(3) nocons cti("Full") label append

